use china_analyze_me2.dta, clear

sort cc05

**generate a dummy for west east and middle
gen east=1 if province05==11|province05==12|province05==31|province05==13|province05==44|province05==45|province05==32|province05==33|province05==46|province05==37|province05==21|province05==35
replace east=0 if east==.&cc05~=.

gen middle=1 if province05==42|province05==41|province05==34|province05==23|province05==43|province05==36|province05==22|province05==15|province05==14
replace middle=0 if middle==.&cc05~=.

gen west=1 if province05==50|province05==51|province05==52|province05==53|province05==54|province05==61|province05==62|province05==63|province05==64|province05==65
replace west=0 if west==.&cc05~=.

**generate a dummy for the 4 provinical level city
gen province_city=1 if cc05==110000|cc05==120000|cc05==310000|cc05==500000
replace province_city=0 if province_city==.&cc05~=.


**generate a dummy for provincial capitals
gen province_capital=1 if province_city==0&cc05-province05*10000==100
replace province_capital=0 if province_capital==.&cc05~=.
 
**generate a dummy for either provincial capital or the 4 provinical level city
gen province_capitalPlus=1 if cc05==110000|cc05==120000|cc05==310000|cc05==500000
replace province_capitalPlus=1 if province_capitalPlus==.&cc05-province05*10000==100
replace province_capitalPlus=0 if province_capitalPlus==.&cc05~=.
 

** pool the 4 provincial level city together
gen pv=province05 if cc05~=110000&cc05~=120000&cc05~=310000&cc05~=500000
replace pv=1 if cc05==110000|cc05==120000|cc05==310000|cc05==500000


**generate log of the 4 baseline control variables
gen larea_cc90=log(cc90_area)
gen larea_pf05=log(pf05_area)
gen la102_pref1990=log(a102_pref1990)
gen la120_pref1990=log(a120_pref1990)



**generate log of some geographic variables or dummy of the geographic variables
gen lkm2coast=log(km2coast)


**generate log of some weather variables or dummy of the weather varialbes
gen lprecipitation=log(precipitation)

**generate log of public transportation variables

gen lbustrol1991_pref=log(bus_trol_pref1991)
gen lbustrol2005_pref=log(bus_trol_pref2005)



**********************************************************************************************************
* generate GDP variables: gdpXXXX_YY, in which XXXX denotes year; YY =cp, pref, or  pfcp (i.e., outer ring pref-cp)
*note: gdp variables are all deflated by provincial level gdp deflator.
*******************************************************************************************************
replace gdpdeflator_2010=gdpdeflator_2005*gdpdeflator_2010/100

gen gdp1990_cp=gdp_py_cp901990 
replace gdp1990_cp=gdp_py_cp90a1990 if gdp1990_cp==.
replace gdp1990_cp = gdp_cp90a1990 if gdp1990_cp==.
replace gdp1990_cp = gdp_cp901990 if gdp1990_cp==.
*** These only include imputed suburban portions of CPs
replace gdp1990_cp = gdp_predf_cp901990 if gdp1990_cp==.
replace gdp1990_cp = gdp_predf_cp90a1990 if gdp1990_cp==.

sum gdp1990_cp
gen lgdp1990_cp=log(gdp1990_cp)
gen gdp1990_cp_df=gdp1990_cp/gdpdeflator_1990

gen gdp1990_pref=gdp_py_pref1990 
*** These are (almost) fully imputed, but are not used for regressions, so include them
replace gdp1990_pref=gdp_predc_prefb1990 if gdp1990_pref==.
replace gdp1990_pref=gdp_predc_prefa1990 if gdp1990_pref==.
gen gdp1990_pref_imputeflag = 0
replace gdp1990_pref_imputeflag = 1 if gdp_py_pref1990==. & gdp1990_pref~=.

sum gdp1990_pref
gen gdp1990_pref_df=gdp1990_pref/gdpdeflator_1990

gen gdp1990_pfcp=gdp1990_pref-gdp1990_cp
gen gdp1990_pfcp_df=gdp1990_pref_df-gdp1990_cp_df

gen gdp2000_cp=gdp_michigan_cp902000 if a102_cp901990~=.
replace gdp2000_cp=gdp_cp902000 if a102_cp901990~=.&gdp_michigan_cp902000==.
replace gdp2000_cp=gdp_cp90a2000 if a102_cp901990~=.&gdp_michigan_cp902000==.&gdp_cp902000==.
replace gdp2000_cp=gdp_michigan_cp902000 if promoted_ccity1990==1|cc05==131100
replace gdp2000_cp=gdp_michigan_cp90a2000 if (promoted_ccity1990==1|cc05==131100)&gdp_michigan_cp902000==.
replace gdp2000_cp=gdp_cp90a2000 if (promoted_ccity1990==1|cc05==131100)&gdp_michigan_cp902000==.&gdp_michigan_cp90a2000==.
sum gdp2000_cp
gen gdp2000_cp_df=gdp2000_cp/gdpdeflator_2000

gen gdp2000_pref=gdp_michigan_pref2000 if a102_cp901990~=.
replace gdp2000_pref=gdp_michigan_pref2000 if promoted_ccity1990==1|cc05==131100
replace gdp2000_pref=gdp_michigan_prefa2000 if (promoted_ccity1990==1|cc05==131100)&gdp_michigan_pref2000==.
replace gdp2000_pref=gdp_michigan_prefb2000 if (promoted_ccity1990==1|cc05==131100)&gdp_michigan_pref2000==.&gdp_michigan_prefa2000==.

sum gdp2000_pref
gen gdp2000_pref_df=gdp2000_pref/gdpdeflator_2000

gen gdp2000_pfcp=gdp2000_pref-gdp2000_cp
gen gdp2000_pfcp_df=gdp2000_pref_df-gdp2000_cp_df

gen gdp2005_cp=gdp_michigan_cp902005 if a102_cp901990~=.
replace gdp2005_cp=gdp_cp902005 if a102_cp901990~=.&gdp_michigan_cp902005==.
replace gdp2005_cp=gdp_cp90a2005 if a102_cp901990~=.&gdp_michigan_cp902005==.&gdp_cp902005==.
replace gdp2005_cp=gdp_michigan_cp902005 if promoted_ccity1990==1|cc05==131100
replace gdp2005_cp=gdp_cp90a2005 if (promoted_ccity1990==1|cc05==131100)&gdp_michigan_cp902005==.
sum gdp2005_cp
gen gdp2005_cp_df=gdp2005_cp/gdpdeflator_2005

gen gdp2005_pref=gdp_michigan_pref2005
gen gdp2005_pref_df=gdp2005_pref/gdpdeflator_2005

gen gdp2005_pfcp=gdp2005_pref-gdp2005_cp
gen gdp2005_pfcp_df=gdp2005_pref_df-gdp2005_cp_df

**** These can include some predicted suburban portions
gen gdp2010_cp=gdp_michigan_cp902010
replace gdp2010_cp=gdp_michigan_cp90a2010 if gdp2010_cp==.
*** These are imputed by apportioning within CP numbers to urban districts based on manufacturing employment
replace gdp2010_cp=gdp_predc_cp90a2010 if gdp2010_cp==.

sum gdp2010_cp
gen gdp2010_cp_df=gdp2010_cp/gdpdeflator_2010

gen gdp2010_pref = gdp_michigan_prefa2010
replace gdp2010_pref = gdp_michigan_prefb2010 if gdp2010_pref==.
gen gdp2010_pref_df=gdp2010_pref/gdpdeflator_2010

gen gdp2010_pfcp=gdp2010_pref-gdp2010_cp
gen gdp2010_pfcp_df=gdp2010_pref_df-gdp2010_cp_df

**********************************************************************************************************
*generate GDP sector 2 variables: gdpsect2XXXX_YY, in which XXXX denotes year; YY =cp, pref, or  pfcp (i.e., outer ring pref-cp)
*note: gdp sect2 varialbes are all deflated by provincial level gdp deflator.
*******************************************************************************************************

gen gdpsect2_1990_cp=gdp_sect2_cp901990
replace gdpsect2_1990_cp=gdp_sect2_cp90a1990 if gdpsect2_1990_cp==.
*** These only include imputed suburban portions, so OK to keep
replace gdpsect2_1990_cp=gdp_sect2_predf_cp901990 if gdpsect2_1990_cp==.
replace gdpsect2_1990_cp=gdp_sect2_predf_cp90a1990 if gdpsect2_1990_cp==.

gen gdpsect2_1990_cp_df=gdpsect2_1990_cp/gdpdeflator_1990

gen gdpsect2_1990_pref=gdp_sect2_pref1990
*** These are (almost) fully imputed, but we do not use them for regressions, so OK
replace gdpsect2_1990_pref=gdp_sect2_predc_prefb1990 if gdpsect2_1990_pref==.
replace gdpsect2_1990_pref=gdp_sect2_predc_prefa1990 if gdpsect2_1990_pref==.
gen gdpsect2_1990_pref_imputeflag = 0
replace gdpsect2_1990_pref_imputeflag = 1 if gdp_sect2_pref1990==. & gdpsect2_1990_pref~=.

gen gdpsect2_1990_pref_df=gdpsect2_1990_pref/gdpdeflator_1990

gen gdpsect2_1990_pfcp=gdpsect2_1990_pref-gdpsect2_1990_cp
gen gdpsect2_1990_pfcp_df=gdpsect2_1990_pref_df-gdpsect2_1990_cp_df

gen gdpsect2_2000_cp=gdp_sector2_mi_cp902000
replace gdpsect2_2000_cp=gdp_sector2_cp902000 if gdp_sector2_mi_cp902000==.
replace gdpsect2_2000_cp=gdp_sector2_cp90a2000 if gdp_sector2_mi_cp902000==.&gdp_sector2_cp902000==.
gen gdpsect2_2000_cp_df=gdpsect2_2000_cp/gdpdeflator_2000

gen gdpsect2_2000_pref=gdp_sector2_mi_pref2000
replace gdpsect2_2000_pref=gdp_sector2_pref2000 if gdp_sector2_mi_pref2000==.
replace gdpsect2_2000_pref=gdp_sector2_prefa2000 if gdp_sector2_mi_pref2000==.&gdp_sector2_pref2000==.
replace gdpsect2_2000_pref=gdp_sector2_prefb2000 if gdp_sector2_mi_pref2000==.&gdp_sector2_pref2000==.&gdp_sector2_prefa2000==.

gen gdpsect2_2000_pref_df=gdpsect2_2000_pref/gdpdeflator_2000

gen gdpsect2_2000_pfcp=gdpsect2_2000_pref-gdpsect2_2000_cp
gen gdpsect2_2000_pfcp_df=gdpsect2_2000_pref_df-gdpsect2_2000_cp_df

gen gdpsect2_2005_cp=gdp_sector2_mi_cp902005
replace gdpsect2_2005_cp=gdp_sector2_cp902005 if gdp_sector2_mi_cp902005==.
replace gdpsect2_2005_cp=gdp_sector2_cp90a2005 if gdp_sector2_mi_cp902005==.&gdp_sector2_cp902005==.
gen gdpsect2_2005_cp_df=gdpsect2_2005_cp/gdpdeflator_2005

gen gdpsect2_2005_pref=gdp_sector2_mi_pref2005
replace gdpsect2_2005_pref=gdp_sector2_pref2005 if gdp_sector2_mi_pref2005==.
replace gdpsect2_2005_pref=gdp_sector2_prefa2005 if gdp_sector2_mi_pref2005==.&gdp_sector2_pref2005==.
replace gdpsect2_2005_pref=gdp_sector2_prefb2005 if gdp_sector2_mi_pref2005==.&gdp_sector2_pref2005==.&gdp_sector2_prefa2005==.
gen gdpsect2_2005_pref_df=gdpsect2_2005_pref/gdpdeflator_2005

gen gdpsect2_2005_pfcp=gdpsect2_2005_pref-gdpsect2_2005_cp
gen gdpsect2_2005_pfcp_df=gdpsect2_2005_pref_df-gdpsect2_2005_cp_df

gen gdpsect2_2010_cp=gdp_sector2_mi_cp902010
*** These are imputed by apportioning within CP numbers to urban districts based on manufacturing employment
replace gdpsect2_2010_cp=gdp_sect2_predc_cp90a2010 if gdpsect2_2010_cp==.
gen gdpsect2_2010_cp_df=gdpsect2_2010_cp/gdpdeflator_2010

gen gdpsect2_2010_pref=gdp_sector2_mi_pref2010
gen gdpsect2_2010_pref_df=gdpsect2_2010_pref/gdpdeflator_2010

gen gdpsect2_2010_pfcp=gdpsect2_2010_pref-gdpsect2_2010_cp
gen gdpsect2_2010_pfcp_df=gdpsect2_2010_pref_df-gdpsect2_2010_cp_df


***************************************************************************************************************
*generate light data f*_pfcp, for outer ring (i.e. pref-cp)
***************************************************************************************************************
gen f101992_pfcp=f101992_pf-f101992_cp90
gen f121995_pfcp=f121995_pf-f121995_cp90
gen f142000_pfcp=f142000_pf-f142000_cp90
gen f152000_pfcp=f152000_pf-f152000_cp90
gen f152005_pfcp=f152005_pf-f152005_cp90
gen f162005_pfcp=f162005_pf-f162005_cp90
gen f162009_pfcp=f162009_pf-f162009_cp90




**********************************************************************************************************
*generate census population variables: censuspopXXXX_YY, in which XXXX denotes year; YY =cp, pref, or  pfcp (i.e., outer ring pref-cp)
*note: lcensuspopXXXX_YY is the log of censuspopXXXX_YY
*******************************************************************************************************
gen censuspop1982_cp=c_totalPop_cp90a1982
gen censuspop1990_cp=a102_cp901990
replace censuspop1990_cp=a102_cp90a1990 if censuspop1990_cp==.
replace censuspop1990_cp=c_totalPop_cp90a1990 if censuspop1990_cp==.
sum censuspop1990_cp

gen censuspop2000_cp=c_totalPop_cp90a2000
gen censuspop2005_cp=c_totalPop_cp90a2005 
gen censuspop2010_cp=a102_cp90a2010
sum censuspop*

gen censuspop1982_pref=c_totalPop_prefa1982
gen censuspop1990_pref=a102_pref1990 

gen censuspop2000_pref=c_totalPop_prefa2000
gen censuspop2005_pref=c_totalPop_prefa2005
gen censuspop2010_pref=a102_prefa2010


sum censuspop*

gen censuspop1982_pfcp=censuspop1982_pref-censuspop1982_cp
gen censuspop1990_pfcp=censuspop1990_pref-censuspop1990_cp
gen censuspop2000_pfcp=censuspop2000_pref-censuspop2000_cp
gen censuspop2005_pfcp=censuspop2005_pref-censuspop2005_cp
gen censuspop2010_pfcp=censuspop2010_pref-censuspop2010_cp 


gen lcensuspop1982_cp=log(censuspop1982_cp)
gen lcensuspop1982_pref=log(censuspop1982_pref)
gen lcensuspop1982_pfcp=log(censuspop1982_pfcp)


gen lcensuspop1990_cp=log(censuspop1990_cp)
gen lcensuspop1990_pref=log(censuspop1990_pref)
gen lcensuspop1990_pfcp=log(censuspop1990_pfcp)


gen lcensuspop2000_cp=log(censuspop2000_cp)
gen lcensuspop2000_pref=log(censuspop2000_pref)
gen lcensuspop2005_cp=log(censuspop2005_cp)
gen lcensuspop2005_pref=log(censuspop2005_pref)
gen lcensuspop2010_cp=log(censuspop2010_cp)
gen lcensuspop2010_pref=log(censuspop2010_pref)




*********************************************
*gdp per capita
**********************************************
gen lgdp_pc_1990_cp=log(gdp1990_cp/censuspop1990_cp)
gen lgdp_pc_1990_pref=log(gdp1990_pref/censuspop1990_pref)



*****************************************************************************************************************
*composition of population
*highedu* population aged 19 to 55 who has high school or above education

*c_totalpop19to55* total population aged 19 to 55
*****************************************************************************************************************

//First generate  some variables using the census data

gen a2641990_cp=a264_cp901990
replace a2641990_cp=a264_cp90a1990 if a264_cp901990==.
gen a2671990_cp=a267_cp901990
replace a2671990_cp=a267_cp90a1990 if a267_cp901990==.
gen a2701990_cp=a270_cp901990
replace a2701990_cp=a270_cp90a1990 if a270_cp901990==.
gen a2731990_cp=a273_cp901990
replace a2731990_cp=a273_cp90a1990 if a273_cp901990==.

gen a2611990_cp=a261_cp901990
replace a2611990_cp=a261_cp90a1990 if a261_cp901990==.
gen a2611990_pref=a261_pref1990

gen a1201990_cp=a120_cp901990
replace a1201990_cp=a120_cp90a1990 if a120_cp901990==.
gen a1201990_pref=a120_pref1990
gen a1201990_pfcp=a1201990_pref-a1201990_cp
gen la1201990_pfcp=log(a1201990_pfcp)


gen a1021990_cp=a102_cp901990
replace a1021990_cp=a102_cp90a1990 if a102_cp901990==.
gen a1021990_pref=a102_pref1990
gen la1021990_pref=log(a1021990_pref)
gen a1021990_pfcp=a1021990_pref-a1021990_cp
gen la1021990_cp=log(a1021990_cp)

gen a3621990_cp=a362_cp901990
replace a3621990_cp=a362_cp90a1990 if a362_cp901990==.
gen a3661990_cp=a366_cp901990
replace a3661990_cp=a366_cp90a1990 if a366_cp901990==.

**a381 emp
gen a3811990_cp=a381_cp901990
replace a3811990_cp=a381_cp90a1990 if a381_cp901990==.
gen a3811990_pref=a381_pref1990
gen la3811990_pref=log(a3811990_pref)
gen a3811990_pfcp=a3811990_pref-a3811990_cp

gen a3871990_cp=a387_cp901990
replace a3871990_cp=a387_cp90a1990 if a387_cp901990==.
gen la3871990_cp=log(a3871990_cp)
gen a3871990_pref=a387_pref1990
gen la3871990_pref=log(a3871990_pref)
gen a3871990_pfcp=a3871990_pref-a3871990_cp

** a384 agri. emp
gen a3841990_cp=a384_cp901990
replace a3841990_cp=a384_cp90a1990 if a384_cp901990==.
gen a3841990_pref=a384_pref1990
gen la3841990_pref=log(a3841990_pref)
gen a3841990_pfcp=a3841990_pref-a3841990_cp

** a117 urban hukou
gen a1171990_cp=a117_cp901990
replace a1171990_cp=a117_cp90a1990 if a117_cp901990==.
gen a1171990_pref=a117_pref1990
gen la1171990_pref=log(a1171990_pref)
gen a1171990_pfcp=a1171990_pref-a1171990_cp
gen la1171990_pfcp=log(a1171990_pfcp)


gen a1921990_cp=a192_cp901990
replace a1921990_cp=a192_cp90a1990 if a192_cp901990==.
gen a1921990_pref=a192_pref1990
gen a1921990_pfcp=a1921990_pref-a1921990_cp

gen a1951990_cp=a195_cp901990
replace a1951990_cp=a195_cp90a1990 if a195_cp901990==.
gen a1951990_pref=a195_pref1990
gen a1951990_pfcp=a1951990_pref-a1951990_cp

gen a1981990_cp=a198_cp901990
replace a1981990_cp=a198_cp90a1990 if a198_cp901990==.
gen a1981990_pref=a198_pref1990
gen a1981990_pfcp=a1981990_pref-a1981990_cp


gen a3961990_cp=a396_cp901990
replace a3961990_cp=a396_cp90a1990 if a396_cp901990==.
gen a3961990_pref=a396_pref1990
gen la3961990_pref=log(a3961990_pref)
gen a3961990_pfcp=a3961990_pref-a3961990_cp




//now generate the education composition variables

gen highedu2005_cp=c_f_19to55_h_cp90a2005+c_m_19to55_h_cp90a2005
gen highedu2005_pref=c_f_19to55_h_prefa2005+c_m_19to55_h_prefa2005
gen highedu2005_pfcp=highedu2005_pref-highedu2005_cp

gen c_totalpop19to55_2005_cp=c_f_19to55_cp90a2005+c_m_19to55_cp90a2005
gen c_totalpop19to55_2005_pref=c_f_19to55_prefa2005+c_m_19to55_prefa2005
gen c_totalpop19to55_2005_pfcp=c_totalpop19to55_2005_pref-c_totalpop19to55_2005_cp

gen frac_highedu2005_pref=highedu2005_pref/c_totalpop19to55_2005_pref
gen frac_highedu2005_cp=highedu2005_cp/c_totalpop19to55_2005_cp

gen sh_pop1982_cppref = censuspop1982_cp/censuspop1982_pref
gen sh_pop1990_cppref = censuspop1990_cp/censuspop1990_pref

gen highedu2000_cp=c_f_19to55_h_cp90a2000+c_m_19to55_h_cp90a2000
gen highedu2000_pref=c_f_19to55_h_prefa2000+c_m_19to55_h_prefa2000
gen highedu2000_pfcp=highedu2000_pref-highedu2000_cp

gen c_totalpop19to55_2000_cp=c_f_19to55_cp90a2000+c_m_19to55_cp90a2000
gen c_totalpop19to55_2000_pref=c_f_19to55_prefa2000+c_m_19to55_prefa2000
gen c_totalpop19to55_2000_pfcp=c_totalpop19to55_2000_pref-c_totalpop19to55_2000_cp

gen frac_highedu2000_pref=highedu2000_pref/c_totalpop19to55_2000_pref
gen frac_highedu2000_cp=highedu2000_cp/c_totalpop19to55_2000_cp


gen highedu1990_cp=a2641990_cp+a2671990_cp+a2701990_cp+a2731990_cp
gen highedu1990_pref=a264_pref1990+a267_pref1990+a270_pref1990+a273_pref1990

gen highedu1990_pfcp=highedu1990_pref-highedu1990_cp

gen frac_highedu1990_pref=highedu1990_pref/(a2611990_pref)
gen frac_highedu1990_cp=highedu1990_cp/(a2611990_cp)
gen sh_highedu1990_cppref = highedu1990_cp/highedu1990_pref

gen highedu1982_cp=c_f_19to55_h_cp90a1982+c_m_19to55_h_cp90a1982
gen highedu1982_pref=c_f_19to55_h_prefa1982+c_m_19to55_h_prefa1982
gen highedu1982_pfcp=highedu1982_pref-highedu1982_cp
gen lhighedu1982_cp = log(highedu1982_cp)
gen lhighedu1982_pref = log(highedu1982_pref)

gen c_totalpop19to55_1982_cp=c_f_19to55_cp90a1982+c_m_19to55_cp90a1982
gen c_totalpop19to55_1982_pref=c_f_19to55_prefa1982+c_m_19to55_prefa1982
gen c_totalpop19to55_1982_pfcp=c_totalpop19to55_1982_pref-c_totalpop19to55_1982_cp

gen frac_highedu1982_pref=highedu1982_pref/c_totalpop19to55_1982_pref
gen frac_highedu1982_cp=highedu1982_cp/c_totalpop19to55_1982_cp
gen sh_highedu1982_cppref = highedu1982_cp/highedu1982_pref

************************************************************************************************************************************
**generate road and rail variables
*s_road_XXXX_ringda=road_XXXX_ringd2a+road_XXXX_ringd3a+road_XXXX_ringd4a
*po_s_road_XXXX_ringda is a dummy with 1 indicating that s_road_XXXX_ringda>0; 

***************************************************************************************************************************************
gen all_road_2010_km_pfcp=all_road_2010_km_pf05-all_road_2010_km_cp90
gen all_road_2005_km_pfcp=all_road_2005_km_pf05-all_road_2005_km_cp90
gen all_road_1999_km_pfcp=all_road_1999_km_pf05-all_road_1999_km_cp90
gen road_1980_km_pfcp=road_1980_km_pf05-road_1980_km_cp90
gen road_1962_km_pfcp=road_1962_km_pf05-road_1962_km_cp90
gen road_1924_km_pfcp=road_1924_km_pf05-road_1924_km_cp90

gen rail_1924_km_pfcp=rail_1924_km_pf05-rail_1924_km_cp90
gen rail_1962_km_pfcp=rail_1962_km_pf05-rail_1962_km_cp90
gen rail_1980_km_pfcp=rail_1980_km_pf05-rail_1980_km_cp90
gen rail_1990_km_pfcp=rail_1990_km_pf05-rail_1990_km_cp90
gen rail_1999_km_pfcp=rail_1999_km_pf05-rail_1999_km_cp90
gen rail_2005_km_pfcp=rail_2005_km_pf05-rail_2005_km_cp90
gen rail_2010_km_pfcp=rail_2010_km_pf05-rail_2010_km_cp90

gen road_rail_1924_rays=road_1924_rays+rail_1924_rays
gen road_rail_1962_rays=road_1962_rays+rail_1962_rays
gen road_rail_1980_rays=road_1980_rays+rail_1980_rays
gen all_road_rail_1999_rays=all_road_1999_rays+rail_1999_rays
gen all_road_rail_2005_rays=all_road_2005_rays+rail_2005_rays
gen all_road_rail_2010_rays=all_road_2010_rays+rail_2010_rays

gen road_rail_1962_km_pref=road_1962_km_pf05+rail_1962_km_pf05
gen road_rail_1980_km_pref=road_1980_km_pf05+rail_1980_km_pf05
gen road_rail_1924_km_pref=road_1924_km_pf05+rail_1924_km_pf05
gen road_rail_1962_km_cp=road_1962_km_cp90+rail_1962_km_cp90
gen road_rail_1980_km_cp=road_1980_km_cp90+rail_1980_km_cp90
gen road_rail_1924_km_cp=road_1924_km_cp90+rail_1924_km_cp90
gen road_rail_1962_km_pfcp=road_rail_1962_km_pref-road_rail_1962_km_cp
gen road_rail_1980_km_pfcp=road_rail_1980_km_pref-road_rail_1980_km_cp
gen road_rail_1924_km_pfcp=road_rail_1924_km_pref-road_rail_1924_km_cp

gen lall_road_2005_km_pf05=log(all_road_2005_km_pf05)
gen lall_road_2005_km_pfcp=log(all_road_2005_km_pfcp+1)
gen lall_road_2005_km_cp90=log(all_road_2005_km_cp90)

gen lall_road_2010_km_pf05=log(all_road_2010_km_pf05)
gen lall_road_2010_km_pfcp=log(all_road_2010_km_pfcp+1)
gen lall_road_2010_km_cp90=log(all_road_2010_km_cp90)


gen lall_road_1999_km_pf05=log(all_road_1999_km_pf05)
gen lall_road_1999_km_pfcp=log(all_road_1999_km_pfcp+1)
gen lall_road_1999_km_cp90=log(all_road_1999_km_cp90)

gen lroad_1962_km_pf05=log(road_1962_km_pf05)
gen lroad_1962_km_pfcp=log(road_1962_km_pfcp+1)
gen lroad_1962_km_cp90=log(road_1962_km_cp90)

gen lroad_1980_km_pf05=log(road_1980_km_pf05)
gen lroad_1980_km_pfcp=log(road_1980_km_pfcp+1)
gen lroad_1980_km_cp90=log(road_1980_km_cp90)


gen lplan5_7_km_cp90=log(plan5_7_km_cp90)
gen lplan5_7_km_pf05=log(plan5_7_km_pf05)
gen lplan5_7_km_pfcp=log(plan5_7_km_pf05-plan5_7_km_cp90+1)
gen po_plan5_7_km_pfcp=1 if plan5_7_km_pf05-plan5_7_km_cp90~=.
replace po_plan5_7_km_pfcp=0 if plan5_7_km_pf05-plan5_7_km_cp90<=0


gen lrail_2010_km_pf05=log(rail_2010_km_pf05+1)
gen lrail_2005_km_pf05=log(rail_2005_km_pf05+1)
gen lrail_1999_km_pf05=log(rail_1999_km_pf05+1)
gen lrail_1990_km_pf05=log(rail_1990_km_pf05+1)
gen lrail_1962_km_pf05=log(rail_1962_km_pf05+1)
gen lrail_1980_km_pf05=log(rail_1980_km_pf05+1)
gen lrail_1962_km_pfcp=log(rail_1962_km_pfcp+1)
gen lrail_1962_km_cp90=log(rail_1962_km_cp90+1)

gen lrail_1990_km_pfcp=log(rail_1990_km_pfcp+1)
gen lrail_2005_km_pfcp=log(rail_2005_km_pfcp+1)
gen lrail_2010_km_pfcp=log(rail_2010_km_pfcp+1)
gen lrail_1999_km_pfcp=log(rail_1999_km_pfcp+1)

gen lroad_rail_1962_km_pref=log(road_rail_1962_km_pref)
gen lroad_rail_1980_km_pref=log(road_rail_1980_km_pref)
	



foreach var in "all_road" {
foreach year in "1999" "2005" "2010"{
 
gen s_`var'_`year'_ringd=`var'_`year'_ringd2+`var'_`year'_ringd3+`var'_`year'_ringd4
gen s_`var'_`year'_ringxd=`var'_`year'_ringXd2+`var'_`year'_ringXd3+`var'_`year'_ringXd4
gen s_`var'_`year'_ringda=`var'_`year'_ringd2A+`var'_`year'_ringd3A+`var'_`year'_ringd4A
gen s_`var'_`year'_ringxda=`var'_`year'_ringXd2A+`var'_`year'_ringXd3A+`var'_`year'_ringXd4A


gen po_s_`var'_`year'_ringda=1 if s_`var'_`year'_ringda>0&s_`var'_`year'_ringda~=.
replace po_s_`var'_`year'_ringda=0 if s_`var'_`year'_ringda==0
gen po_s_`var'_`year'_ringxda=1 if s_`var'_`year'_ringxda>0&s_`var'_`year'_ringxda~=.
replace po_s_`var'_`year'_ringxda=0 if s_`var'_`year'_ringxda==0

gen po_s_`var'_`year'_ringdaG15=1 if (`var'_`year'_ringd3A>0|`var'_`year'_ringd4A>0)&s_`var'_`year'_ringda~=.
replace po_s_`var'_`year'_ringdaG15=0 if `var'_`year'_ringd3A==0&`var'_`year'_ringd4A==0

gen po_s_`var'_`year'_ringxdaG15=1 if (`var'_`year'_ringXd3A>0|`var'_`year'_ringXd4A>0)&s_`var'_`year'_ringxda~=.
replace po_s_`var'_`year'_ringxdaG15=0 if `var'_`year'_ringXd3A==0&`var'_`year'_ringXd4A==0

}
}



foreach var in "road" {
foreach year in "1924" "1962" "1980"{
 
gen s_`var'_`year'_ringd=`var'_`year'_ringd2+`var'_`year'_ringd3+`var'_`year'_ringd4
gen s_`var'_`year'_ringxd=`var'_`year'_ringXd2+`var'_`year'_ringXd3+`var'_`year'_ringXd4
gen s_`var'_`year'_ringda=`var'_`year'_ringd2A+`var'_`year'_ringd3A+`var'_`year'_ringd4A
gen s_`var'_`year'_ringxda=`var'_`year'_ringXd2A+`var'_`year'_ringXd3A+`var'_`year'_ringXd4A


gen po_s_`var'_`year'_ringda=1 if s_`var'_`year'_ringda>0&s_`var'_`year'_ringda~=.
replace po_s_`var'_`year'_ringda=0 if s_`var'_`year'_ringda==0
gen po_s_`var'_`year'_ringxda=1 if s_`var'_`year'_ringxda>0&s_`var'_`year'_ringxda~=.
replace po_s_`var'_`year'_ringxda=0 if s_`var'_`year'_ringxda==0

gen po_s_`var'_`year'_ringdaG15=1 if (`var'_`year'_ringd3A>0|`var'_`year'_ringd4A>0)&s_`var'_`year'_ringda~=.
replace po_s_`var'_`year'_ringdaG15=0 if `var'_`year'_ringd3A==0&`var'_`year'_ringd4A==0

gen po_s_`var'_`year'_ringxdaG15=1 if (`var'_`year'_ringXd3A>0|`var'_`year'_ringXd4A>0)&s_`var'_`year'_ringxda~=.
replace po_s_`var'_`year'_ringxdaG15=0 if `var'_`year'_ringXd3A==0&`var'_`year'_ringXd4A==0

}
}

rename big_road_2010_rays hhroad_2010_rays
rename big_road_2010_km_pf05 hhroad_2010_km_pf05
rename big_road_2010_km_cp90 hhroad_2010_km_cp90


gen hhroad_2010_km_pfcp=hhroad_2010_km_pf05-hhroad_2010_km_cp90 
gen lhhroad_2010_km_pfcp=log(hhroad_2010_km_pfcp+1)
gen lhhroad_2010_km_pf05=log(hhroad_2010_km_pf05+1)


rename big_road_2010_ringd1A hhroad_2010_ringd1a
rename big_road_2010_ringd2A hhroad_2010_ringd2a
rename big_road_2010_ringd3A hhroad_2010_ringd3a
rename big_road_2010_ringd4A hhroad_2010_ringd4a

rename big_road_2010_ringXd1A hhroad_2010_ringxd1a
rename big_road_2010_ringXd2A hhroad_2010_ringxd2a
rename big_road_2010_ringXd3A hhroad_2010_ringxd3a
rename big_road_2010_ringXd4A hhroad_2010_ringxd4a



foreach var in "hhroad"{
foreach year in "2010" {
 
gen s_`var'_`year'_ringd=`var'_`year'_ringd2+`var'_`year'_ringd3+`var'_`year'_ringd4
gen s_`var'_`year'_ringxd=`var'_`year'_ringxd2+`var'_`year'_ringxd3+`var'_`year'_ringxd4
gen s_`var'_`year'_ringda=`var'_`year'_ringd2a+`var'_`year'_ringd3a+`var'_`year'_ringd4a
gen s_`var'_`year'_ringxda=`var'_`year'_ringxd2a+`var'_`year'_ringxd3a+`var'_`year'_ringxd4a


gen po_s_`var'_`year'_ringda=1 if s_`var'_`year'_ringda>0&s_`var'_`year'_ringda~=.
replace po_s_`var'_`year'_ringda=0 if s_`var'_`year'_ringda==0
gen po_s_`var'_`year'_ringxda=1 if s_`var'_`year'_ringxda>0&s_`var'_`year'_ringxda~=.
replace po_s_`var'_`year'_ringxda=0 if s_`var'_`year'_ringxda==0

gen po_s_`var'_`year'_ringdaG15=1 if (`var'_`year'_ringd3a>0|`var'_`year'_ringd4a>0)&s_`var'_`year'_ringda~=.
replace po_s_`var'_`year'_ringdaG15=0 if `var'_`year'_ringd3a==0&`var'_`year'_ringd4a==0

gen po_s_`var'_`year'_ringxdaG15=1 if (`var'_`year'_ringxd3a>0|`var'_`year'_ringxd4a>0)&s_`var'_`year'_ringxda~=.
replace po_s_`var'_`year'_ringxdaG15=0 if `var'_`year'_ringxd3a==0&`var'_`year'_ringxd4a==0

}
}

foreach var in "rail"{
foreach year in "1962" "2010" {

gen s_`var'_`year'_ringda=`var'_`year'_ringd2A+`var'_`year'_ringd3A+`var'_`year'_ringd4A
gen s_`var'_`year'_ringxda=`var'_`year'_ringXd2A+`var'_`year'_ringXd3A+`var'_`year'_ringXd4A


gen po_s_`var'_`year'_ringda=1 if s_`var'_`year'_ringda>0&s_`var'_`year'_ringda~=.
replace po_s_`var'_`year'_ringda=0 if s_`var'_`year'_ringda==0
gen po_s_`var'_`year'_ringxda=1 if s_`var'_`year'_ringxda>0&s_`var'_`year'_ringxda~=.
replace po_s_`var'_`year'_ringxda=0 if s_`var'_`year'_ringxda==0


}
}


*************************************************************************************************************************************
*generate growth variables
*D_varXXZZ_YY=log(varZZ_YY)-log(varXX_YY), where ZZ is the ending year, XX is the starting year, YY=cp, pref or pfcp.
*note: if var has zero values, then we take log(var+1)
**Dlevel_varXXZZ_YY=varZZ_YY-varXX_YY, where ZZ is the ending year, XX is the starting year, YY=cp, pref or pfcp.
**************************************************************************************************************************************
*\\growth of light
gen D_f15_9205_cp=log(f152005_cp90)-log(f101992_cp90)

gen D_f15_9200_cp=log(f152000_cp90)-log(f101992_cp90)

gen D_f15_9200_pref=log(f152000_pf)-log(f101992_pf)
gen D_f15_9205_pref=log(f152005_pf)-log(f101992_pf)
gen D_f15_9210_pref=log(f162009_pf)-log(f101992_pf)

*\\growth of GDP
gen D_gdp9005_cp=log(gdp2005_cp)-log(gdp1990_cp)
gen D_gdp9005_pref=log(gdp2005_pref)-log(gdp1990_pref)
gen D_gdp9000_cp=log(gdp2000_cp)-log(gdp1990_cp)
gen D_gdp9000_pref=log(gdp2000_pref)-log(gdp1990_pref)
gen D_gdp9010_cp=log(gdp2010_cp)-log(gdp1990_cp)
gen D_gdp9010_pref=log(gdp2010_pref)-log(gdp1990_pref)

gen D_gdp0010_cp=log(gdp2010_cp)-log(gdp2000_cp)
gen D_gdp0510_cp=log(gdp2010_cp)-log(gdp2005_cp)



gen D_gdpsect2_9005_cp=log(gdpsect2_2005_cp)-log(gdpsect2_1990_cp)
gen D_gdpsect2_9005_pref=log(gdpsect2_2005_pref)-log(gdpsect2_1990_pref)
gen D_gdpsect2_9000_cp=log(gdpsect2_2000_cp)-log(gdpsect2_1990_cp)
gen D_gdpsect2_9000_pref=log(gdpsect2_2000_pref)-log(gdpsect2_1990_pref)
gen D_gdpsect2_9010_cp=log(gdpsect2_2010_cp)-log(gdpsect2_1990_cp)
gen D_gdpsect2_9010_pref=log(gdpsect2_2010_pref)-log(gdpsect2_1990_pref)
gen D_gdpsect2_0010_cp=log(gdpsect2_2010_cp)-log(gdpsect2_2000_cp)

*\\growth of population
gen D_censuspop0010_cp=log(censuspop2010_cp)-log(censuspop2000_cp)
gen D_censuspop8290_cp=log(censuspop1990_cp)-log(censuspop1982_cp)
gen D_censuspop9000_cp=log(censuspop2000_cp)-log(censuspop1990_cp)
gen D_censuspop9005_cp=log(censuspop2005_cp)-log(censuspop1990_cp)
gen D_censuspop9010_cp=log(censuspop2010_cp)-log(censuspop1990_cp)
gen D_censuspop8210_cp=log(censuspop2010_cp)-log(censuspop1982_cp)

gen D_censuspop0010_pref=log(censuspop2010_pref)-log(censuspop2000_pref)
gen D_censuspop8290_pref=log(censuspop1990_pref)-log(censuspop1982_pref)
gen D_censuspop9005_pref=log(censuspop2005_pref)-log(censuspop1990_pref)
gen D_censuspop9000_pref=log(censuspop2000_pref)-log(censuspop1990_pref)
gen D_censuspop9010_pref=log(censuspop2010_pref)-log(censuspop1990_pref)
gen D_censuspop8210_pref=log(censuspop2010_pref)-log(censuspop1982_pref)

*Change in Share Variables
gen D_share9010=D_censuspop9010_cp-D_censuspop9010_pref
gen D_gdpsh9010=D_gdpsect2_9010_cp-D_censuspop9010_pref
gen D_gdpshg9010=D_gdpsect2_9010_cp-D_gdp9010_pref

*\\growth in road and rail
gen D_rail_km9005_pref=log(rail_2005_km_pf05+1)-log(rail_1990_km_pf05+1)


gen D_rail_km9000_pref=log(rail_1999_km_pf05+1)-log(rail_1990_km_pf05+1)


gen D_all_road_km9005_pref=log(all_road_2005_km_pf05)-log(road_1990_km_pf05)
gen D_all_road_rays9005=log(all_road_2005_rays)-log(road_1990_rays)	
	
gen D_all_road_km9000_pref=log(all_road_1999_km_pf05)-log(road_1990_km_pf05)


***************************************************************************************************
*generate employment composition and employment growth variables
****************************************************************************************************

gen lempman1982_cp = log(c_emp_man_cp90a1982)
gen lempman1982_pref = log(c_emp_man_prefa1982)
gen share_emp_man1982_cp=c_emp_man_cp90a1982/c_emp_cp90a1982
gen share_emp_man1982_pref=c_emp_man_prefa1982/c_emp_prefa1982
gen sh_emp_man1982_cppref = c_emp_man_cp90a1982/c_emp_man_prefa1982

gen share_emp_ag1982_pref=c_emp_ag_prefa1982/c_emp_prefa1982
gen share_emp_agmin1982_pref=(c_emp_ag_prefa1982+c_emp_min_prefa1982)/c_emp_prefa1982

gen share_emp_man1990_cp=a3871990_cp/a3811990_cp
gen share_emp_man1990_pref=a3871990_pref/a3811990_pref
gen sh_emp_man1990_cppref = a3871990_cp/a3871990_pref

gen emp1990_cp=a3811990_cp
gen emp2010_cp=a381_cp90a2010*10
gen emp1990_pref=a3811990_pref
gen emp2010_pref=a381_prefa2010*10

gen emp_man1990_cp=a3871990_cp
gen emp_man2010_cp=aN6005_cp90a2010*10
gen emp_man1990_pref=a3871990_pref
gen emp_man2010_pref=aN6005_prefa2010*10

gen D_emp9010_cp=ln(emp2010_cp)-ln(emp1990_cp)
gen D_emp9010_pref=ln(emp2010_pref)-ln(emp1990_pref)

gen D_emp_man9010_cp=ln(emp_man2010_cp)-ln(emp_man1990_cp)
gen D_emp_man9010_pref=ln(emp_man2010_pref)-ln(emp_man1990_pref)

gen emp_man2000_pref=c_emp_man_cp90a2000

gen D_emp_man9000_pref=log(emp_man2000_pref)-log(emp_man1990_pref)



**************************************************************************************************************************************************
**generate variables using manufacturing census data
******************************************************************************



gen manuempT2010_cp=aN6004_cp90a2010+aN6005_cp90a2010+aN6006_cp90a2010+aN6007_cp90a2010
gen nonmanuempT2010_cp=a381_cp90a2010-manuempT2010_cp
gen manuempT1990_cp=a3871990_cp
gen nonmanuempT1990_cp=a3811990_cp-manuempT1990_cp
gen D_manuempT9010_cp=log(manuempT2010_cp+1)-log(manuempT1990_cp+1)
gen D_nonmanuempT9010_cp=log(nonmanuempT2010_cp+1)-log(nonmanuempT1990_cp+1)


foreach X in  food trad txtlaprl plstc nonmtl metals metal metalpdct machinery cptNinst ht nonht aprl eduspt geq seq trans ptran eeq comm inst art lghttrad hvytrad plstcNrbbr allhvy allmdm {

gen D_manuemp`X'9508_cp=log(`X'_emp2008_cp90+1)-log(`X'_emp1995_cp90+1)
gen D_manuemp`X'9508_pref=log(`X'_emp2008_pref+1)-log(`X'_emp1995_pref+1)
gen lmanuemp`X'1995_cp=log(`X'_emp1995_cp90+1)
gen lmanuemp`X'1995_pref=log(`X'_emp1995_pref+1)
}

gen D_manuempT9508_cp=log(unit_emp2008_cp90+1)-log(unit_emp1995_cp90+1)
gen D_manuempT9508_pref=log(unit_emp2008_pref+1)-log(unit_emp1995_pref+1)
gen lmanuempT1995_cp=log(unit_emp1995_cp90+1)
gen lmanuempT1995_pref=log(unit_emp1995_pref+1)

***************************************************************************************
*generate sample selection index
******************************************************************************
gen D0 = 1
**** Fully rural prefectures
replace D0 = 0 if unpromoted_ccity1990==1
**** Prefectures whose core cities coincide with prefecture boundaries
replace D0 = 0 if cc05==371200|cc05==620200|cc05==420700
**** Core cities that are too small in 1990
replace D0 = 0 if a1171990_cp<50000


save china_decent_regs.dta, replace
